import random
import csv
import pandas as pd


#每个用户评论不同的电影的数据集
def create_ratings(input_file, output_file):
    user_comment_movie = {}
    with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
        for i, line in enumerate(f_in):
            if i == 0:  # 去掉文件第一行的title
                continue
            print(line.split(","))
            imgList = []
            # movie_id, title, imgList1, imgList2, imgList3, imgList4, movieUrl  = line.split(",")
            movie_id, title, detailUrl  = line.split(",")
            for j in range(0, 100):#一个电影生成100条用户评论
                user = random.randint(0, 1000)
                rating = random.randint(1, 5)
                if  movie_id not in user_comment_movie:
                    user_comment_movie[movie_id] = set()
                if user not in user_comment_movie[movie_id]:
                    user_comment_movie[movie_id].add(user)
                    temp = f"{user},{str(movie_id)},{str(rating)}\n"  # 构造输出字符串，可以包含其他信息
                    f_out.write(temp)

def sort_ratings(input_file, output_file):
    df = pd.read_csv(input_file)
    # print(df)
    data = df.sort_values(by="user", ascending=True)
    # 把新的数据写入文件
    data.to_csv(output_file, index=False)

if __name__ == '__main__':
    create_ratings('movies.csv', 'ratings.csv')
    sort_ratings('ratings.csv', 'ratings_sorted.csv')